'''
Created on Sep 4, 2013

@author: LONG HOANG GIANG
'''
from lxml.html import clean
import lib
import lxml
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '../'))
# 
# data = Web.load("http://www.hdvietnam.com/diendan/33-fshare-vn/675745-hanh-dong-kinh-di-vien-tuong.html#post5853501")
# print data.get_header()
# print data.get_cookie()
# 
# print data.get_article_content()

# tree = lib.Web.load('http://alobooks.vn/doc-online/55580/qua-yeu-chuong-01.html').build_tree()
# node = tree.xpath("//div[@class='content node-book']")
# print lib.stringify(node)
# 
# cleaner = clean.Cleaner()
# cleaner.javascript = True
# cleaner.style = True
# print lxml.html.parse('http://dantri.com.vn')

def ctext(el):
    result = [ ]
    if el.text:
        result.append(el.text)
    for sel in el:
        if sel.tag in ["tr", "td", "table"]:
            result.append("<%s>" % sel.tag)
            result.append(ctext(sel))
            result.append("</%s>" % sel.tag)
        else:
            result.append(ctext(sel))
        if sel.tail:
            result.append(sel.tail)
    return "".join(result).strip()
# 
# html = lib.Web.load('http://alobooks.vn/doc-online/50709/lan-cau-hon-thu-hai-chuong-01.html').build_tree()
# node = html.xpath("//div[@class='content node-book']")[0]
#  
# print ctext(node)


a = lxml.html.parse('http://dantri.com.vn/suc-manh-so/samsung-trinh-lang-galaxy-note-3-man-hinh-57inch-voi-thiet-ke-sang-trong-775104.htm')
ab = a.xpath("//div[@id='ctl00_IDContent_ctl00_divContent']")[0]
print lib.node2Text(ab)


# aa = '''
# <a>con cho <b> tai sao</b><i>con cho den</i> the thi the nao</a>
# <a>hello</a>
# '''
# 
# el = lxml.html.fromstring(aa)
# for sel in el:
#     print sel.tag
# 
# print ctext(el)


